{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PGVecto.rs\n",
    "\n",
    "This notebook shows how to use functionality related to the Postgres vector database ([pgvecto.rs](https://github.com/tensorchord/pgvecto.rs))."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install \"pgvecto_rs[sdk]\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List\n",
    "\n",
    "from langchain.docstore.document import Document\n",
    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain_community.embeddings.fake import FakeEmbeddings\n",
    "from langchain_community.vectorstores.pgvecto_rs import PGVecto_rs\n",
    "from langchain_text_splitters import CharacterTextSplitter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
    "documents = loader.load()\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "docs = text_splitter.split_documents(documents)\n",
    "\n",
    "embeddings = FakeEmbeddings(size=3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Start the database with the [official demo docker image](https://github.com/tensorchord/pgvecto.rs#installation)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "vscode": {
     "languageId": "shellscript"
    }
   },
   "outputs": [],
   "source": [
    "! docker run --name pgvecto-rs-demo -e POSTGRES_PASSWORD=mysecretpassword -p 5432:5432 -d tensorchord/pgvecto-rs:latest"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then contruct the db URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## PGVecto.rs needs the connection string to the database.\n",
    "## We will load it from the environment variables.\n",
    "import os\n",
    "\n",
    "PORT = os.getenv(\"DB_PORT\", 5432)\n",
    "HOST = os.getenv(\"DB_HOST\", \"localhost\")\n",
    "USER = os.getenv(\"DB_USER\", \"postgres\")\n",
    "PASS = os.getenv(\"DB_PASS\", \"mysecretpassword\")\n",
    "DB_NAME = os.getenv(\"DB_NAME\", \"postgres\")\n",
    "\n",
    "# Run tests with shell:\n",
    "URL = \"postgresql+psycopg://{username}:{password}@{host}:{port}/{db_name}\".format(\n",
    "    port=PORT,\n",
    "    host=HOST,\n",
    "    username=USER,\n",
    "    password=PASS,\n",
    "    db_name=DB_NAME,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally, create the VectorStore from the documents:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "db1 = PGVecto_rs.from_documents(\n",
    "    documents=docs,\n",
    "    embedding=embeddings,\n",
    "    db_url=URL,\n",
    "    # The table name is f\"collection_{collection_name}\", so that it should be unique.\n",
    "    collection_name=\"state_of_the_union\",\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can connect to the table laterly with:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create new empty vectorstore with collection_name.\n",
    "# Or connect to an existing vectorstore in database if exists.\n",
    "# Arguments should be the same as when the vectorstore was created.\n",
    "db1 = PGVecto_rs.from_collection_name(\n",
    "    embedding=embeddings,\n",
    "    db_url=URL,\n",
    "    collection_name=\"state_of_the_union\",\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make sure that the user is permitted to create a table."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Similarity search with score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Similarity Search with Euclidean Distance (Default)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "docs: List[Document] = db1.similarity_search(query, k=4)\n",
    "for doc in docs:\n",
    "    print(doc.page_content)\n",
    "    print(\"======================\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Similarity Search with Filter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pgvecto_rs.sdk.filters import meta_contains\n",
    "\n",
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "docs: List[Document] = db1.similarity_search(\n",
    "    query, k=4, filter=meta_contains({\"source\": \"../../modules/state_of_the_union.txt\"})\n",
    ")\n",
    "\n",
    "for doc in docs:\n",
    "    print(doc.page_content)\n",
    "    print(\"======================\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Or:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "docs: List[Document] = db1.similarity_search(\n",
    "    query, k=4, filter={\"source\": \"../../modules/state_of_the_union.txt\"}\n",
    ")\n",
    "\n",
    "for doc in docs:\n",
    "    print(doc.page_content)\n",
    "    print(\"======================\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
